## test-wcor.R -  jesus.benitez-baleato@uni-konstanz.de - GPL-v3
##
## Description:   Test weighted correlation function wcor().
##                With population constant, the output of wcor()
##                should be the same as cor()
##
## ##
##
## configure environment
##
library(ggplot2)
options(warn=-1)

working.dir <- getwd()
df.csv <- (
           read.csv(
                    paste0(
                           working.dir,
                           "/data/sublevels-data-testwcor.csv"
                           ),
                    stringsAsFactors=FALSE
                    )
           )

## retrieve case list
## 
countries <- unique(df.csv$country)
prefixes <- unique(df.csv$prefix)


for (country.case in countries) {
  writeLines(
        paste(country.case)
        )
  ## print('   Computing correlations...')

  ## enact empty df
  ##
  df.r <- data.frame(
                     value=rep(0, 4),
                     prefix=rep("",4),
                     trans=rep("",4), 
                     stringsAsFactors=FALSE
                     )
  for (prefix.case in prefixes) {

    df <- subset(
                 df.csv,
                 country==country.case & prefix==prefix.case
                 )
    
    ## retrieve case's data and parameters
    ##
    df$population <- as.numeric(df$population)
    df$centroids <- as.numeric(df$centroids)
    df$connected <- as.numeric(df$connected)
    
    country <- country.case
    year <- unique(df$year)
    month <- unique(df$month)
    census <- unique(df$census)
    granularity <- unique(df$granularity)
    stats <- unique(df$stast)
    prefix <- unique(df$prefix)
    variable <- unique(df$variable)

    df$penetration <- ( df$connected * 100 )/ df$total   ## total = population in test!
    df$centroidspercapita <- (1000*df$centroids)/df$population

    ## compute weighted correlation
    ##
    wcor <- function(
                     x,
                     y,
                     w = rep(1,length(x))
                     ) {
      ## each variable must be a vector of the same length
      stopifnot(length(x) == length(y) && length(x) == length(w))

      ## compute weigthed means
      w.x.mean <- sum(w*x)/sum(w)
      w.y.mean <- sum(w*y)/sum(w)

      ## compute weighted covariances
      w.cov.x <- sum(w * (x - w.x.mean ) * (x - w.x.mean ))/sum(w)
      w.cov.y <- sum(w * (y - w.y.mean ) * (y - w.y.mean ))/sum(w)
      w.cov.xy <- sum(w * (x - w.x.mean ) * (y - w.y.mean ))/sum(w)
      
      ## compute the weighted correlation
      w.cor <- w.cov.xy / sqrt(w.cov.x * w.cov.y)
      return(w.cor)
    }

    df$bivariate.x <- df$penetration
    df$bivariate.y <- df$centroidspercapita

    model.cor <- cor(
                    df$bivariate.x,
                    df$bivariate.y
                    )
    
    model.wcorrelation <- wcor(
                               df$bivariate.x,
                               df$bivariate.y,
                               df$population ## constant in the test dataset
                                             ## population = total in csv! 
                               )
    writeLines(paste('    cor():',model.cor))
    writeLines(paste('   wcor():',model.wcorrelation))
  } # prefixes
} # countries
writeLines('\nDone.')
